import requests
from bs4 import BeautifulSoup
from time import sleep

def getHtmlPage():
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}
    url = 'http://data.sports.sohu.com/nba/nba_players_rank.php'
    resp = requests.post(url=url,headers=headers)
    resp.encoding = 'GBK'
    fo = open('NBA.html','w',encoding = 'GBK')
    fo.write(resp.text)
    fo.close
    return resp.text

def parseHtml(pageText):
    contentList = []
    soup = BeautifulSoup(pageText,'html.parser')
    
    contentDiv = soup.select('.green01')
    contentDiv1 = soup.select('.green02')
    contentDiv2 = soup.select('.green03')
    contentDiv3 = soup.select('.white')
    
    for i in range(len(contentDiv)):
        content = contentDiv[i].text
        contentList.append(content)
        contentList.append("\n")
    for i in range(len(contentDiv1)):
        content = contentDiv1[i].text
        contentList.append(content)
        contentList.append("\n")
    for i in range(len(contentDiv2)):
        content = contentDiv2[i].text
        contentList.append(content)
        contentList.append("\n")
    for i in range(len(contentDiv3)):
        content = contentDiv3[i].text
        contentList.append(content)
        contentList.append("\n")
    
    fo = open('NBA.txt','w',encoding='utf-8')
    fo.writelines(contentList)
    fo.close()
    print('页面爬取完毕!')
    

if __name__ == '__main__':
    pageText = getHtmlPage()
    parseHtml(pageText)




















    
